1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
| class Colorize(object):
def __init__(self, model_dir='data'):
self.font_color = FontColor(col_file=osp.join(model_dir,'models/colors_new.cp'))
self.p_bevel = 0.05 self.p_outline = 0.05 self.p_drop_shadow = 0.15 self.p_border = 0.15 self.p_displacement = 0.30 self.p_texture = 0.0
def drop_shadow(self, alpha, theta, shift, size, op=0.80): """ 给输入的 alpha 图像添加投影效果,并返回带有投影效果的图像。投影的效果由参数 alpha、theta、shift、size 和 op 来控制。 alpha : alpha layer whose shadow need to be cast theta : [0,2pi] -- the shadow direction shift : shift in pixels of the shadow size : size of the GaussianBlur filter op : opacity of the shadow (multiplying factor)
@return : alpha of the shadow layer (it is assumed that the color is black/white) """ if size%2==0: size -= 1 size = max(1,size) shadow = cv.GaussianBlur(alpha,(size,size),0) [dx,dy] = shift * np.array([-np.sin(theta), np.cos(theta)]) shadow = op*sii.shift(shadow, shift=[dx,dy],mode='constant',cval=0) return shadow.astype('uint8')
def border(self, alpha, size, kernel_type='RECT'): """ alpha : alpha layer of the text size : size of the kernel kernel_type : one of [rect,ellipse,cross]
@return : alpha layer of the border (color to be added externally). """ kdict = {'RECT':cv.MORPH_RECT, 'ELLIPSE':cv.MORPH_ELLIPSE, 'CROSS':cv.MORPH_CROSS} kernel = cv.getStructuringElement(kdict[kernel_type],(size,size)) border = cv.dilate(alpha,kernel,iterations=1) return border
def blend(self,cf,cb,mode='normal'): """ 在这个方法中,函数只返回了前景图像。这表明该函数还没有完成或者是开发者忘记编写具体的合成算法。 """ return cf
def merge_two(self,fore,back,blend_type=None): """ merge two FOREground and BACKground layers. ref: https://en.wikipedia.org/wiki/Alpha_compositing ref: Chapter 7 (pg. 440 and pg. 444): http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf """ a_f = fore.alpha/255.0 a_b = back.alpha/255.0 c_f = fore.color c_b = back.color a_r = a_f + a_b - a_f*a_b if blend_type != None: c_blend = self.blend(c_f, c_b, blend_type) c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b + ((1-a_b)*a_f)[:,:,None] * c_f + (a_f*a_b)[:,:,None] * c_blend ) else: c_r = ( ((1-a_f)*a_b)[:,:,None] * c_b + a_f[:,:,None]*c_f ) return Layer((255*a_r).astype('uint8'), c_r.astype('uint8'))
def merge_down(self, layers, blends=None): """ 将多个图层逐层合并成单个图层 layers : [l1,l2,...ln] : a list of LAYER objects. l1 is on the top, ln is the bottom-most layer. blend : the type of blend to use. Should be n-1. use None for plain alpha blending. Note : (1) it assumes that all the layers are of the SAME SIZE. @return : a single LAYER type object representing the merged-down image """ nlayers = len(layers) if nlayers > 1: [n,m] = layers[0].alpha.shape[:2] out_layer = layers[-1] for i in range(-2,-nlayers-1,-1): blend=None if blends is not None: blend = blends[i+1] out_layer = self.merge_two(fore=layers[i], back=out_layer,blend_type=blend) return out_layer else: return layers[0]
def resize_im(self, im, osize): return np.array(Image.fromarray(im).resize(osize[::-1], Image.BICUBIC)) def occlude(self): """ somehow add occlusion to text. 这个方法 occlude() 是一个占位方法,还未实现其具体功能。
根据注释中的描述,该方法的目的是向文本中添加遮挡效果。然而,在代码中该方法没有任何实现,只有一个空的 pass 语句。这意味着在当前的代码实现中,该方法没有具体的功能。
如果你希望实现该方法,你可以根据具体需求和设计思路,编写代码来实现添加遮挡效果的逻辑。例如,可以使用图像处理技术在文本区域上添加遮挡元素,或者通过修改文本的视觉特征来模拟遮挡效果。具体的实现方式取决于你的需求和想要实现的效果。 """ pass
def color_border(self, col_text, col_bg): """ 用于确定边框的颜色的选择逻辑 Decide on a color for the border: - could be the same as text-color but lower/higher 'VALUE' component. 边框颜色与文本颜色相同,但是 'VALUE' 分量较低或较高 - could be the same as bg-color but lower/higher 'VALUE'. 边框颜色与背景颜色相同,但是 'VALUE' 分量较低或较高 - could be 'mid-way' color b/w text & bg colors. 边框颜色为文本颜色和背景颜色之间的中间颜色。 """ choice = np.random.choice(3) col_text = cv.cvtColor(col_text, cv.COLOR_RGB2HSV) col_text = np.reshape(col_text, (np.prod(col_text.shape[:2]),3)) col_text = np.mean(col_text,axis=0).astype('uint8') vs = np.linspace(0,1) def get_sample(x): """ 通过计算与目标值 x/255.0 的差距,选择一个随机样本,并在其上加入一定的随机扰动。返回取样结果乘以 255,得到一个颜色分量值。 """ ps = np.abs(vs - x/255.0) ps /= np.sum(ps) v_rand = np.clip(np.random.choice(vs,p=ps) + 0.1*np.random.randn(),0,1) return 255*v_rand
if choice==0: col_text[0] = get_sample(col_text[0]) col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) elif choice==1: col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) col_text = self.font_color.complement(col_text) else: col_bg = cv.cvtColor(col_bg, cv.COLOR_RGB2HSV) col_bg = np.reshape(col_bg, (np.prod(col_bg.shape[:2]),3)) col_bg = np.mean(col_bg,axis=0).astype('uint8') col_bg = np.squeeze(cv.cvtColor(col_bg[None,None,:],cv.COLOR_HSV2RGB)) col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB)) col_text = self.font_color.triangle_color(col_text,col_bg)
col_text = np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_RGB2HSV)) col_text[2] = get_sample(col_text[2]) return np.squeeze(cv.cvtColor(col_text[None,None,:],cv.COLOR_HSV2RGB))
def color_text(self, text_arr, h, bg_arr): """ 用于确定文本的颜色。具体而言,该方法采用以下几种方式之一来选择文本的颜色 Decide on a color for the text: - could be some other random image. 从其他随机图像中选择一个颜色作为文本的颜色 - could be a color based on the background. 根据背景选择一个颜色 this color is sampled from a dictionary built from text-word images' colors. The VALUE channel is randomized. 这个颜色是从建立在文本单词图像颜色上的字典中进行采样的。其中,颜色的亮度通道 (VALUE) 是随机化的
H : minimum height of a character """ bg_col,fg_col,i = 0,0,0 fg_col,bg_col = self.font_color.sample_from_data(bg_arr) return Layer(alpha=text_arr, color=fg_col), fg_col, bg_col
def process(self, text_arr, bg_arr, min_h): """ 用于将文本图层 text_arr 融合到背景图像 bg_arr 上 text_arr : one alpha mask : nxm, uint8 bg_arr : background image: nxmx3, uint8 min_h : height of the smallest character (px)
return text_arr blit onto bg_arr. """ l_text, fg_col, bg_col = self.color_text(text_arr, min_h, bg_arr) bg_col = np.mean(np.mean(bg_arr,axis=0),axis=0) l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'),color=bg_col) l_text.alpha = l_text.alpha * np.clip(0.88 + 0.1*np.random.randn(), 0.72, 1.0) layers = [l_text] blends = []
if np.random.rand() < self.p_border: if min_h <= 15 : bsz = 1 elif 15 < min_h < 30: bsz = 3 else: bsz = 5 border_a = self.border(l_text.alpha, size=bsz) l_border = Layer(border_a, self.color_border(l_text.color,l_bg.color)) layers.append(l_border) blends.append('normal')
if np.random.rand() < self.p_drop_shadow: if min_h <= 15 : bsz = 1 elif 15 < min_h < 30: bsz = 3 else: bsz = 5
theta = np.pi/4 * np.random.choice([1,3,5,7]) + 0.5*np.random.randn()
if min_h <= 15 : shift = 2 elif 15 < min_h < 30: shift = 7+np.random.randn() else: shift = 15 + 3*np.random.randn()
op = 0.50 + 0.1*np.random.randn() shadow = self.drop_shadow(l_text.alpha, theta, shift, 3*bsz, op) l_shadow = Layer(shadow, 0) layers.append(l_shadow) blends.append('normal') l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_col) layers.append(l_bg) blends.append('normal') l_normal = self.merge_down(layers,blends) l_bg = Layer(alpha=255*np.ones_like(text_arr,'uint8'), color=bg_arr) l_out = blit_images(l_normal.color,l_bg.color.copy()) if l_out is None: layers[-1] = l_bg return self.merge_down(layers,blends).color
return l_out
def check_perceptible(self, txt_mask, bg, txt_bg): """ 这是一个被弃用的方法 check_perceptible,它用于检查文本与背景图像合并后是否仍然可见。 --- DEPRECATED; USE GRADIENT CHECKING IN POISSON-RECONSTRUCT INSTEAD ---
checks if the text after merging with background is still visible. txt_mask (hxw) : binary image of text -- 255 where text is present 0 elsewhere bg (hxwx3) : original background image WITHOUT any text. txt_bg (hxwx3) : image with text. """ bgo,txto = bg.copy(), txt_bg.copy() txt_mask = txt_mask.astype('bool') bg = cv.cvtColor(bg.copy(), cv.COLOR_RGB2Lab) txt_bg = cv.cvtColor(txt_bg.copy(), cv.COLOR_RGB2Lab) bg_px = bg[txt_mask,:] txt_px = txt_bg[txt_mask,:] bg_px[:,0] *= 100.0/255.0 txt_px[:,0] *= 100.0/255.0
diff = np.linalg.norm(bg_px-txt_px,ord=None,axis=1) diff = np.percentile(diff,[10,30,50,70,90]) print ("color diff percentile :", diff) return diff, (bgo,txto)
def color(self, bg_arr, text_arr, hs, place_order=None, pad=20): """ 将文本图像着色 Return colorized text image.
text_arr : list of (n x m) numpy text alpha mask (unit8). hs : list of minimum heights (scalar) of characters in each text-array. text_loc : [row,column] : location of text in the canvas. canvas_sz : size of canvas image. return : nxmx3 rgb colorized text-image. """ bg_arr = bg_arr.copy() if bg_arr.ndim == 2 or bg_arr.shape[2]==1: bg_arr = np.repeat(bg_arr[:,:,None], 3, 2)
canvas_sz = np.array(bg_arr.shape[:2])
if place_order is None: place_order = np.array(range(len(text_arr)))
rendered = [] for i in place_order[::-1]: loc = np.where(text_arr[i]) lx, ly = np.min(loc[0]), np.min(loc[1]) mx, my = np.max(loc[0]), np.max(loc[1]) l = np.array([lx,ly]) m = np.array([mx,my])-l+1 text_patch = text_arr[i][l[0]:l[0]+m[0],l[1]:l[1]+m[1]]
ext = canvas_sz - (l+m) num_pad = pad*np.ones(4,dtype='int32') num_pad[:2] = np.minimum(num_pad[:2], l) num_pad[2:] = np.minimum(num_pad[2:], ext) text_patch = np.pad(text_patch, pad_width=((num_pad[0],num_pad[2]), (num_pad[1],num_pad[3])), mode='constant') l -= num_pad[:2]
w,h = text_patch.shape bg = bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:]
rdr0 = self.process(text_patch, bg, hs[i]) rendered.append(rdr0)
bg_arr[l[0]:l[0]+w,l[1]:l[1]+h,:] = rdr0
""" 需要注意的是,当前代码中存在一个问题,即在 for 循环中的最后一行使用了 return,导致循环只执行一次。可能是由于代码缩进错误导致的。如果确实需要返回结果,则应将该行移动到 for 循环结束后再执行。 """ return bg_arr
return bg_arr
|